library(ggplot2)
library(readr)
library(here)
  1. Experiment with geom_point() using the mtcars data set. Try out different aesthetics with different variables. What do you note?
  ggplot(mtcars, aes(x = hp, y = wt, color = gear)) +
    geom_point()

  ggplot(mtcars, aes(x = hp, y = wt, color = factor(gear))) +
    geom_point()

Hence, we note that there is a difference whether we map color to a numeric or a (discrete) factor variable.

  1. Use the mtcars data set and plot mpg vs. hp. Add a smoothing line to the plot.
  ggplot(mtcars, aes(hp, mpg)) +
    geom_point() +
    geom_smooth()

  1. Add a smoothing function to the plot for each number of cylinders.
  ggplot(mtcars, aes(hp, mpg, color = factor(cyl))) +
    geom_point() +
    geom_smooth()

  1. Find out how to remove the confidence interval.
  ggplot(mtcars, aes(hp, mpg, color = as.factor(cyl))) +
    geom_point() +
    geom_smooth(se = FALSE)

  1. Use a simple linear regression model and a quadratic regression model for smoothing.
    ggplot(mtcars, aes(hp, mpg, color = as.factor(cyl))) +
      geom_point() +
      geom_smooth(method = lm)

  ggplot(mtcars, aes(hp, mpg, color = as.factor(cyl))) +
    geom_point() +
    geom_smooth(method = lm, formula = y ~ I(x + x^2))

  1. Download the Titanic data set from Moodle and
  titanic <- readr::read_csv(here::here("SoSe_2022/ggplot2/titanic.csv"))
  ggplot(titanic, aes(x = survived)) +
    geom_bar()

  ggplot(titanic, aes(x = survived, fill = sex)) +
    geom_bar()

  ggplot(titanic, aes(x = survived, fill = sex)) +
    geom_bar() +
    facet_grid(.~pclass)

  ggplot(titanic, aes(x = age, colour = survived, fill = survived)) +
    geom_density(alpha = 0.5)

  1. Try to answer the following questions about the mpg dataset (comes with ggplot2) using ggplot.
  ggplot(mpg, aes(x = displ, y = hwy)) +
    geom_point()

  ggplot(mpg, aes(x = hwy, fill = manufacturer)) +
    geom_density(alpha = 0.75)

  ggplot(mpg, aes(x = hwy, fill = factor(year))) +
     geom_density(alpha = 0.5)

  1. Compare the two data sets economics and economics_long (both come with ggplot) with respect to the ease of use when working with ggplot.
  ggplot(economics_long, aes(x = date, y = value, color = variable)) + 
    geom_line()

  ggplot(economics, aes(x = date)) + 
    geom_line(aes(y = pop), col = "darkgrey") + 
    geom_line(aes(y = uempmed), col = "blue") + 
    geom_line(aes(y = pce), col = "orange") + 
    geom_line(aes(y = psavert), col = "red") + 
    geom_line(aes(y = unemploy), col = "green")

ggplot2 likes long data. If you want to plot multiple variables with color coding and legend, then you should transform your data set into a format similar to economics_long. See tidyr for switching between long and wide data formats.

  1. Reproduce the plot created by the following code with ggplot.
    plot(mtcars$mpg ~ mtcars$wt, xlab = "wt", ylab = "mpg", pch = 19, ylim = c(5, 35))
    mod <- lm(mpg ~ wt, data = mtcars)
    abline(mod, col = "red")
    wt_new <- seq(min(mtcars$wt), max(mtcars$wt), by = 0.05)
    conf_interval <- predict(mod, newdata = data.frame(wt = wt_new), 
                             interval = "confidence", level = 0.95)
    # setup vertrices of polygon (for shading the CI):
    p <- cbind(c(wt_new, rev(wt_new)), c(conf_interval[, 3], rev(conf_interval[, 2])))
    polygon(p, col = adjustcolor("steelblue", alpha.f = 0.5), )
    lines(wt_new, conf_interval[, 2], col = "steelblue", lty = 2)
    lines(wt_new, conf_interval[, 3], col = "steelblue", lty = 2)

  ggplot(mtcars, aes(wt, mpg)) +
    geom_point(size = 2) +
    geom_smooth(method = lm, col = 'red', fill = 'steelblue')